---
title: "R Notebook"
output: html_notebook
---

```{r}

rm(list = ls())

library(tidyverse)
library(readr)
library(reldist)
library(openxlsx)
library(spatstat)
library(quantreg)
library(ggthemes)
library(scales)

setwd("C:/Users/PeterLimerick/Desktop/CAPITA-master/Output analysis/")

weighted.geomean <- function(x, w, ...) exp(weighted.mean(log(x), w, ...))


```

# Getting outfiles in shape

```{r}
outfile_base <- read_csv("CAPITA_OUTFILE_BASE.csv")

select <- outfile_base %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("HHWeightSh"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^HHID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Base")

select_nd <- unique(select$HHID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")

colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(HHWeightSh = HHWeightSh)

labour_base <- labour

remove(outfile_base)
```


```{r}
outfile_ubi <- read_csv("CAPITA_OUTFILE_UBI.csv")

select <- outfile_ubi %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("HHWeightSh"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^HHID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Sim")

select_nd <- unique(select$HHID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")

colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(HHWeightSh = HHWeightSh)

labour_ubi <- labour

write_csv(labour_ubi, "UBI.csv")

remove(outfile_ubi)
```



```{r}
outfile_m4 <- read_csv("CAPITA_OUTFILE_M4.csv")

select <- outfile_m4 %>% select(contains("LfStatr_"), contains("LfStats_"), contains("LfStat1_"), contains("LfStat2_"),
                                   contains("LfStat3_"), contains("LfStat4_"), contains("NsaSW"), 
                                   matches("AllowType"), matches("PenType"), matches("StudyType._"), matches("ActualAge._"),
                                   matches("ParPaySW._"), contains("HHWeightSh"), contains("IncDispA"),  contains("IncPrivA"),
                                   matches("^HHID"), contains("IUTypeS"), contains("DepsUnder15"), contains("IUPosS"),
                                  contains("JspTotA"), contains("YaTotA"), contains("FtbaA"), contains("FtbbA"), contains("YaRand"))

colnames(select) <- colnames(select) %>% str_remove("_Sim")

select_nd <- unique(select$HHID)

## We note that each line is an income unit, and each income unit will have a number of DepsUnder15 not noted within the income unit.

labour_r <- select %>% select(matches("r$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15"))

labour_s <- select %>% select(matches("s$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15"))

labour_1 <- select %>% select(matches("1$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_2 <- select %>% select(matches("2$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_3 <- select %>% select(matches("3$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

labour_4 <- select %>% select(matches("4$"), "HHWeightSh", contains("HHID"), contains("DepsUnder15")) %>% mutate(JspTotA = 0, FtbaA = 0, FtbbA = 0)

colnames(labour_r) <- colnames(labour_r) %>% str_remove("r$")

colnames(labour_s) <- colnames(labour_s) %>% str_remove("s$")

colnames(labour_1) <- colnames(labour_1) %>% str_remove("1$")

colnames(labour_2) <- colnames(labour_2) %>% str_remove("2$")

colnames(labour_3) <- colnames(labour_3) %>% str_remove("3$")

colnames(labour_4) <- colnames(labour_4) %>% str_remove("4$")

labour <- labour_r %>% rbind(labour_s, labour_1, labour_2, labour_3, labour_4)

labour <- labour %>% filter(!is.na(LfStat)) %>% rename(HHWeightSh = HHWeightSh)


labour_m4 <- labour

remove(outfile_m4, labour, labour_r, labour_s, labour_1, labour_2, labour_3, labour_4)
```

# RUN ALWAYS - Making combined dataset 'compare' (and compare_csv for debugging)

```{r}
labour_base <- labour_base %>% mutate(Model = "base")
labour_ubi <- labour_ubi %>% mutate(Model = "ubi")
labour_gmi <- labour_m4 %>% mutate(Model = "gmi")

compare <- bind_rows(labour_base, labour_ubi, labour_gmi)

```

# RUN ALWAYS - Equivalised incomes

Using the ABS OECD-modified equivalence scale, this is 1 for lone person, and 0.5 for each additional person 15 years or older, 0.3 for each child under 15 years. 

NOTE THAT compare_equiv displays income unit information. As such, it is useful for quick income unit comparisons between models, with no further need for grouping.

```{r}
compare <- compare %>% 
  group_by(Model, HHID) %>%
  mutate(equivalence_factor = case_when(
  row_number() == 1 ~ 1 + DepsUnder15*0.3,
  TRUE ~ 0.5
))

compare_equiv <- compare %>% mutate(Pension_age = ifelse(ActualAge > "65",1,0),
                                    BI_total = YaTotA+JspTotA,
                                    Number_earning_income = ifelse(IncPrivA < 10400, 0, 1)) %>%
                            mutate(under_18_BI_total = ifelse(ActualAge < 18, BI_total,0)) %>%
  group_by(Model, HHID) %>%
  summarise(
  DispInc = sum(IncDispA), 
  PrivInc = sum(IncPrivA), 
  HHWeightSh = mean(HHWeightSh),
  age = mean(ActualAge),
  Pension_age = sum(Pension_age),
  equivalence_factor = sum(equivalence_factor),
  IUTType = mean(IUTypeS),
  FTB = sum(FtbaA+FtbbA),
  BI_total = sum(BI_total),
  under_18_BI_total = sum(under_18_BI_total),
  Num_earning_income = sum(Number_earning_income),
  famsize = n()+max(DepsUnder15)
  )

compare_equiv <- compare_equiv %>% mutate(equivalised_DispInc = DispInc/equivalence_factor)

compare_equiv <- compare_equiv %>%           # HERE I AM CREATING NEW EASIER TO UNDERSTAND INCOME UNITS
 mutate(IU_earners = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 & Num_earning_income < 1 ~ "wCouple without a significant income earner",
    IUTType == 2 & Num_earning_income < 2 ~ "Single income couple without dependent children",
    IUTType == 2 ~ "Dual income couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>%
   mutate(IU_big = as.factor(case_when(
    Pension_age == 1 & IUTType >= 3 ~ "Single above pension age",
    Pension_age >= 1 ~ "Couple above pension age",
    IUTType == 1 ~ "Couple with dependent children",
    IUTType == 2 ~ "Couple without dependent children",
    IUTType == 3 ~ "Lone parent with dependent children",
    TRUE ~ "Lone person"
  ))) %>% mutate(famHHWeightSh = HHWeightSh*famsize)

base_equiv <- compare_equiv %>% filter(Model == "base")
ubi_equiv <- compare_equiv %>% filter(Model == "ubi")
gmi_equiv <- compare_equiv %>% filter(Model == "gmi")


equiv_gini_base <- gini(base_equiv$equivalised_DispInc, base_equiv$HHWeightSh)
equiv_gini_gmi <- gini(gmi_equiv$equivalised_DispInc, gmi_equiv$HHWeightSh)
equiv_gini_ubi <- gini(ubi_equiv$equivalised_DispInc, ubi_equiv$HHWeightSh)



```



```{r}

compare_equiv %>% group_by(Model) %>% 
  mutate(equivalised_DispInc = case_when(
    equivalised_DispInc <= 1 ~ 1,
    TRUE ~ equivalised_DispInc)) %>%
  summarise(median_disp_inc = weighted.median(DispInc, HHWeightSh),
            median_equiv_inc = weighted.median(equivalised_DispInc, HHWeightSh), 
            mean_equiv_inc = weighted.mean(equivalised_DispInc, HHWeightSh), 
            geomean_equiv_inc = weighted.geomean(equivalised_DispInc, HHWeightSh), 
            geometric_arithmetic_ratio = geomean_equiv_inc/mean_equiv_inc
  )
            

```
